1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Please determine the required text preprocessing steps using the following flag
replace_special_chars <- TRUE
remove_duplicate_chars <- TRUE
replace_numbers <- TRUE
convert_to_lower_case <- TRUE
remove_default_stopWords <- TRUE
remove_given_stopWords <- TRUE
stem_words <- TRUE
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Map 1-based optional input ports to variables
dataset1 <- maml.mapInputPort(1) # class: data.frame
# get the label and text columns from the input data set
text_column <- dataset1[["tweet_text"]]
#label_column <- dataset1[["label_column"]]
stopword_list <- NULL
result <- tryCatch({
dataset2 <- maml.mapInputPort(2) # class: data.frame
# get the stopword list from the second input data set
stopword_list <- dataset2[[1]]
}, warning = function(war) {
# warning handler
print(paste("WARNING: ", war))
}, error = function(err) {
# error handler
print(paste("ERROR: ", err))
stopword_list <- NULL
}, finally = {})
# Load the R script from the Zip port in ./src/
source("src/text.preprocessing.R");
text_column <- preprocessText(text_column,
replace_special_chars,
remove_duplicate_chars,
replace_numbers,
convert_to_lower_case,
remove_default_stopWords,
remove_given_stopWords,
stem_words,
stopword_list)
Sentinment <- dataset1[["sentiment_label"]]
data.set <- data.frame(
Sentinment,
text_column,
stringsAsFactors = FALSE
)
# Select data.frame to be sent to the output Dataset port
maml.mapOutputPort("data.set")